パッケージのインストールと読み込み

# install the package
# install.packages("remote", dependencies = T):初回だけ必要
remotes::install_github("covid19datahub/R")
## Skipping install of 'COVID19' from a github remote, the SHA1 (8943fa78) has not changed since last install.
##   Use `force = TRUE` to force installation
# データを読み込むために毎回必要


# load the package
library("COVID19")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(DT)

データを呼び出す

# 世界中の国ごとのデータ
x <- covid19()
## 
##   Data Science for Social Impact research group, University of Pretoria
##   (2020), https://github.com
## 
##   Public Health Agency, Sweden (2020), https://oppnadata.se
## 
##   Ministery of Health, Slovenia (2020), https://www.gov.si
## 
##   Open Government Data, Latvia (2020), https://data.gov.lv
## 
##   Open Government Data, Liechtenstein (2020), https://github.com
## 
##   Ministero della Salute, Italia (2020), https://github.com
## 
##   COVID19-India API (2020), https://www.covid19india.org
## 
##   OpenCOVID19 France (2020), https://github.com
## 
##   Wikipedia (2020), https://www.wikipedia.org
## 
##   Ministery of Health of Czech Republic (2020),
##   https://onemocneni-aktualne.mzcr.cz
## 
##   Ministerio de Salud y Protección Social de Colombia (2020),
##   https://www.datos.gov.co
## 
##   Swiss Federal Statistical Office (2018), https://www.bfs.admin.ch
## 
##   Public Health Infobase, Government of Canada (2020),
##   https://health-infobase.canada.ca
## 
##   Epistat, Belgian Infectious Diseases (2020),
##   https://epistat.sciensano.be
## 
##   Open Government Data, Austria (2020),
##   https://info.gesundheitsministerium.at
## 
##   CIA - Central Intelligence Agency (2020), https://www.cia.gov
## 
##   Our World in Data (2020), https://github.com
## 
##   World Bank Open Data (2018), https://data.worldbank.org
## 
##   Hale Thomas, Sam Webster, Anna Petherick, Toby Phillips, and Beatriz
##   Kira (2020). Oxford COVID-19 Government Response Tracker, Blavatnik
##   School of Government.
## 
##   Johns Hopkins Center for Systems Science and Engineering (2020),
##   https://github.com
## 
##   Guidotti, E., Ardia, D., (2020), "COVID-19 Data Hub", Working paper,
##   doi: 10.13140/RG.2.2.11649.81763.
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
## 
## To hide the data sources use 'verbose = FALSE'.
# 世界中の都道府県レベルのデータ
# x <- covid19(level = 2)

# イタリアとアメリカの市区町村レベルのデータ
# x <- covid19(c("Italy","US"), level = 3)

世界銀行のデータとマージ(結合)

wb <- c("gdp" = "NY.GDP.MKTP.CD", "hosp_beds" = "SH.MED.BEDS.ZS")
x  <- covid19(wb = wb)
## 
##   Data Science for Social Impact research group, University of Pretoria
##   (2020), https://github.com
## 
##   Public Health Agency, Sweden (2020), https://oppnadata.se
## 
##   Ministery of Health, Slovenia (2020), https://www.gov.si
## 
##   Open Government Data, Latvia (2020), https://data.gov.lv
## 
##   Open Government Data, Liechtenstein (2020), https://github.com
## 
##   Ministero della Salute, Italia (2020), https://github.com
## 
##   COVID19-India API (2020), https://www.covid19india.org
## 
##   OpenCOVID19 France (2020), https://github.com
## 
##   Wikipedia (2020), https://www.wikipedia.org
## 
##   Ministery of Health of Czech Republic (2020),
##   https://onemocneni-aktualne.mzcr.cz
## 
##   Ministerio de Salud y Protección Social de Colombia (2020),
##   https://www.datos.gov.co
## 
##   Swiss Federal Statistical Office (2018), https://www.bfs.admin.ch
## 
##   Public Health Infobase, Government of Canada (2020),
##   https://health-infobase.canada.ca
## 
##   Epistat, Belgian Infectious Diseases (2020),
##   https://epistat.sciensano.be
## 
##   Open Government Data, Austria (2020),
##   https://info.gesundheitsministerium.at
## 
##   CIA - Central Intelligence Agency (2020), https://www.cia.gov
## 
##   Our World in Data (2020), https://github.com
## 
##   World Bank Open Data (2018), https://data.worldbank.org
## 
##   Hale Thomas, Sam Webster, Anna Petherick, Toby Phillips, and Beatriz
##   Kira (2020). Oxford COVID-19 Government Response Tracker, Blavatnik
##   School of Government.
## 
##   Johns Hopkins Center for Systems Science and Engineering (2020),
##   https://github.com
## 
##   Guidotti, E., Ardia, D., (2020), "COVID-19 Data Hub", Working paper,
##   doi: 10.13140/RG.2.2.11649.81763.
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
## 
## To hide the data sources use 'verbose = FALSE'.
# x2という名前のオブジェクトに5/17現在のデータのみを取り出してしまう.
x2 <- x %>%
  filter(date=="2020-05-17")

datatable(x2, extensions = 'Buttons', 
          options = list(
            dom = 'Bfrtip',
            buttons = c('copy', 'csv', 'excel')
  ))

Google モビリティデータとの結合

gmr <- "https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv"
gmr_x   <- covid19(gmr = gmr)
## 
##   Data Science for Social Impact research group, University of Pretoria
##   (2020), https://github.com
## 
##   Public Health Agency, Sweden (2020), https://oppnadata.se
## 
##   Ministery of Health, Slovenia (2020), https://www.gov.si
## 
##   Open Government Data, Latvia (2020), https://data.gov.lv
## 
##   Open Government Data, Liechtenstein (2020), https://github.com
## 
##   Ministero della Salute, Italia (2020), https://github.com
## 
##   COVID19-India API (2020), https://www.covid19india.org
## 
##   OpenCOVID19 France (2020), https://github.com
## 
##   Wikipedia (2020), https://www.wikipedia.org
## 
##   Ministery of Health of Czech Republic (2020),
##   https://onemocneni-aktualne.mzcr.cz
## 
##   Ministerio de Salud y Protección Social de Colombia (2020),
##   https://www.datos.gov.co
## 
##   Swiss Federal Statistical Office (2018), https://www.bfs.admin.ch
## 
##   Public Health Infobase, Government of Canada (2020),
##   https://health-infobase.canada.ca
## 
##   Epistat, Belgian Infectious Diseases (2020),
##   https://epistat.sciensano.be
## 
##   Open Government Data, Austria (2020),
##   https://info.gesundheitsministerium.at
## 
##   CIA - Central Intelligence Agency (2020), https://www.cia.gov
## 
##   Our World in Data (2020), https://github.com
## 
##   World Bank Open Data (2018), https://data.worldbank.org
## 
##   Hale Thomas, Sam Webster, Anna Petherick, Toby Phillips, and Beatriz
##   Kira (2020). Oxford COVID-19 Government Response Tracker, Blavatnik
##   School of Government.
## 
##   Johns Hopkins Center for Systems Science and Engineering (2020),
##   https://github.com
## 
##   Guidotti, E., Ardia, D., (2020), "COVID-19 Data Hub", Working paper,
##   doi: 10.13140/RG.2.2.11649.81763.
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
## 
## To hide the data sources use 'verbose = FALSE'.
# gmr_x2という名前のオブジェクトに5/7のデータのみを取り出してしまう.
gmr_x2 <- x %>%
  filter(date=="2020-05-07")

datatable(gmr_x2, extensions = 'Buttons', 
          options = list(
            dom = 'Bfrtip',
            buttons = c('copy', 'csv', 'excel')
  ))

Apple モビリティデータとの結合

amr <- "https://covid19-static.cdn-apple.com/covid19-mobility-data/"
amr <- paste0(amr, "2008HotfixDev28/v2/en-us/applemobilitytrends-2020-05-15.csv")
amr_x   <- covid19(amr = amr)
## 
##   Data Science for Social Impact research group, University of Pretoria
##   (2020), https://github.com
## 
##   Public Health Agency, Sweden (2020), https://oppnadata.se
## 
##   Ministery of Health, Slovenia (2020), https://www.gov.si
## 
##   Open Government Data, Latvia (2020), https://data.gov.lv
## 
##   Open Government Data, Liechtenstein (2020), https://github.com
## 
##   Ministero della Salute, Italia (2020), https://github.com
## 
##   COVID19-India API (2020), https://www.covid19india.org
## 
##   OpenCOVID19 France (2020), https://github.com
## 
##   Wikipedia (2020), https://www.wikipedia.org
## 
##   Ministery of Health of Czech Republic (2020),
##   https://onemocneni-aktualne.mzcr.cz
## 
##   Ministerio de Salud y Protección Social de Colombia (2020),
##   https://www.datos.gov.co
## 
##   Swiss Federal Statistical Office (2018), https://www.bfs.admin.ch
## 
##   Public Health Infobase, Government of Canada (2020),
##   https://health-infobase.canada.ca
## 
##   Epistat, Belgian Infectious Diseases (2020),
##   https://epistat.sciensano.be
## 
##   Open Government Data, Austria (2020),
##   https://info.gesundheitsministerium.at
## 
##   CIA - Central Intelligence Agency (2020), https://www.cia.gov
## 
##   Our World in Data (2020), https://github.com
## 
##   World Bank Open Data (2018), https://data.worldbank.org
## 
##   Hale Thomas, Sam Webster, Anna Petherick, Toby Phillips, and Beatriz
##   Kira (2020). Oxford COVID-19 Government Response Tracker, Blavatnik
##   School of Government.
## 
##   Johns Hopkins Center for Systems Science and Engineering (2020),
##   https://github.com
## 
##   Guidotti, E., Ardia, D., (2020), "COVID-19 Data Hub", Working paper,
##   doi: 10.13140/RG.2.2.11649.81763.
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
## 
## To hide the data sources use 'verbose = FALSE'.
# amr_x2という名前のオブジェクトに5/07現在のデータのみを取り出してしまう.
amr_x2 <- x %>%
  filter(date=="2020-05-07")

datatable(amr_x2, extensions = 'Buttons', 
          options = list(
            dom = 'Bfrtip',
            buttons = c('copy', 'csv', 'excel')
  ))

Googleのモビリティデータと世界銀行データの結合

wb <- c("gdp" = "NY.GDP.MKTP.CD", "hosp_beds" = "SH.MED.BEDS.ZS")
gmr <- "https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv"

wb_gmr_data   <- covid19(gmr = gmr,
                         wb = wb)
## 
##   Data Science for Social Impact research group, University of Pretoria
##   (2020), https://github.com
## 
##   Public Health Agency, Sweden (2020), https://oppnadata.se
## 
##   Ministery of Health, Slovenia (2020), https://www.gov.si
## 
##   Open Government Data, Latvia (2020), https://data.gov.lv
## 
##   Open Government Data, Liechtenstein (2020), https://github.com
## 
##   Ministero della Salute, Italia (2020), https://github.com
## 
##   COVID19-India API (2020), https://www.covid19india.org
## 
##   OpenCOVID19 France (2020), https://github.com
## 
##   Wikipedia (2020), https://www.wikipedia.org
## 
##   Ministery of Health of Czech Republic (2020),
##   https://onemocneni-aktualne.mzcr.cz
## 
##   Ministerio de Salud y Protección Social de Colombia (2020),
##   https://www.datos.gov.co
## 
##   Swiss Federal Statistical Office (2018), https://www.bfs.admin.ch
## 
##   Public Health Infobase, Government of Canada (2020),
##   https://health-infobase.canada.ca
## 
##   Epistat, Belgian Infectious Diseases (2020),
##   https://epistat.sciensano.be
## 
##   Open Government Data, Austria (2020),
##   https://info.gesundheitsministerium.at
## 
##   CIA - Central Intelligence Agency (2020), https://www.cia.gov
## 
##   Our World in Data (2020), https://github.com
## 
##   World Bank Open Data (2018), https://data.worldbank.org
## 
##   Hale Thomas, Sam Webster, Anna Petherick, Toby Phillips, and Beatriz
##   Kira (2020). Oxford COVID-19 Government Response Tracker, Blavatnik
##   School of Government.
## 
##   Johns Hopkins Center for Systems Science and Engineering (2020),
##   https://github.com
## 
##   Guidotti, E., Ardia, D., (2020), "COVID-19 Data Hub", Working paper,
##   doi: 10.13140/RG.2.2.11649.81763.
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
## 
## To hide the data sources use 'verbose = FALSE'.
# wb_gmr_data2という名前のオブジェクトに5/7のデータのみを取り出してしまう.
wb_gmr_data2 <- wb_gmr_data %>%
  filter(date=="2020-05-07")


datatable(wb_gmr_data2, extensions = 'Buttons', 
          options = list(
            dom = 'Bfrtip',
            buttons = c('copy', 'csv', 'excel')
  ))

データソースのリスト

# Data sources
s <- attr(wb_gmr_data2, "src")

datatable(s, extensions = 'Buttons', 
          options = list(
            dom = 'Bfrtip',
            buttons = c('copy', 'csv', 'excel')
  ))

* どこから持ってきたデータか. - いっぱいあって見にくいので,テーブル形式で見やすいようにした.

csv形式で直接書き出すときのコード

write.csv(wb_gmr_data2, "corona_data.csv")

可視化してみる.

# 必要に応じて
# install.packages("ggplotgui")

#library(ggplotgui)
#ggplot_shiny(wb_gmr_data2)

ちょっと違うところのデータをいじってみましょう.

# install.packages("devtools"):初回だけ
# devtools::install_github("RamiKrispin/coronavirus"):初回だけ

# 毎回データのリフレッシュが必要
library(coronavirus)
update_dataset()  
## Updates are available on the coronavirus Dev version, do you want to update? n/Y

データの全体を見てみよう

datatable(coronavirus, extensions = 'Buttons', 
          options = list(
            dom = 'Bfrtip',
            buttons = c('copy', 'csv', 'excel')
  ))
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

Summary of the total confrimed cases by country (top 20):

library(dplyr)

summary_df <- coronavirus %>% 
  filter(type == "confirmed") %>%
  group_by(country) %>%
  summarise(total_cases = sum(cases)) %>%
  arrange(-total_cases)

datatable(summary_df, extensions = 'Buttons', 
          options = list(
            dom = 'Bfrtip',
            buttons = c('copy', 'csv', 'excel')
  ))

Summary of new cases during the past 24 hours by country and type

library(tidyr)

coronavirus %>% 
  filter(date == max(date)) %>%
  select(country, type, cases) %>%
  group_by(country, type) %>%
  summarise(total_cases = sum(cases)) %>%
  pivot_wider(names_from = type,
              values_from = total_cases) %>%
  arrange(-confirmed)
## # A tibble: 188 x 4
## # Groups:   country [188]
##    country        confirmed death recovered
##    <chr>              <int> <int>     <int>
##  1 US                 25050  1632      4333
##  2 Brazil             17126   963      5491
##  3 Russia             10598   113      4696
##  4 Peru                3891   125      1996
##  5 India               3787   104      2289
##  6 United Kingdom      3564   385         4
##  7 Pakistan            3011    64      1185
##  8 Chile               2502    26       959
##  9 Mexico              2437   290      1976
## 10 Saudi Arabia        2307     9      2818
## # … with 178 more rows

Plotting the total cases by type worldwide:

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
coronavirus %>% 
  group_by(type, date) %>%
  summarise(total_cases = sum(cases)) %>%
  pivot_wider(names_from = type, values_from = total_cases) %>%
  arrange(date) %>%
  mutate(active = confirmed - death - recovered) %>%
  mutate(active_total = cumsum(active),
                recovered_total = cumsum(recovered),
                death_total = cumsum(death)) %>%
  plot_ly(x = ~ date,
                  y = ~ active_total,
                  name = 'Active', 
                  fillcolor = '#1f77b4',
                  type = 'scatter',
                  mode = 'none', 
                  stackgroup = 'one') %>%
  add_trace(y = ~ death_total, 
             name = "Death",
             fillcolor = '#E41317') %>%
  add_trace(y = ~recovered_total, 
            name = 'Recovered', 
            fillcolor = 'forestgreen') %>%
  layout(title = "Distribution of Covid19 Cases Worldwide",
         legend = list(x = 0.1, y = 0.9),
         yaxis = list(title = "Number of Cases"),
         xaxis = list(title = "Source: Johns Hopkins University Center for Systems Science and Engineering"))

Plot the confirmed cases distribution by counrty with treemap plot:

conf_df <- coronavirus %>% 
  filter(type == "confirmed") %>%
  group_by(country) %>%
  summarise(total_cases = sum(cases)) %>%
  arrange(-total_cases) %>%
  mutate(parents = "Confirmed") %>%
  ungroup() 
  
  plot_ly(data = conf_df,
          type= "treemap",
          values = ~total_cases,
          labels= ~ country,
          parents=  ~parents,
          domain = list(column=0),
          name = "Confirmed",
          textinfo="label+value+percent parent")

課題用データ:

datatable(wb_gmr_data2, extensions = 'Buttons', 
          options = list(
            dom = 'Bfrtip',
            buttons = c('copy', 'csv', 'excel')
  ))